Overview

This tutorial demonstrates how to use CoCo-ST (Compare and Contrast Spatial Transcriptomics) to identify both high-variance and low-variance spatial domains in Visium spot data. We will walk through data loading, normalization, contrastive learning, spatial domain detection, and deconvolution using scRNA-seq references.

Load Required Libraries

We first load all dependencies required for spatial analysis, visualization, and deconvolution.

# Load required libraries
library(Seurat)
library(ggplot2)
library(patchwork)
library(dplyr)
library(kernlab)
library(EnhancedVolcano)
library(CoCoST)  # Or source("Codes/CoCoST_function.R") if local
library(Matrix) 
library(spacexr)
library(slingshot)

# Increase memory limit for large datasets
options(future.globals.maxSize = 2 * 1024^6)  # 2 GB

Load and Normalize Visium Spatial Data

We load background (normal) and foreground (tumor) Visium datasets

# Load background (normal) Visium data
normalTissue <- Load10X_Spatial(
  data.dir = "Y:/Projects/FM_ST/Data/Visium/Raw/53430/outs",
  filename = "filtered_feature_bc_matrix.h5",
  assay = "Spatial"
)
normalTissue <- SCTransform(normalTissue, assay = "Spatial", verbose = FALSE)

# Load foreground (tumor) Visium data
abnormalTissue <- Load10X_Spatial(
  data.dir = "Y:/Projects/FM_ST/Data/Visium/Raw/53433/outs",
  filename = "filtered_feature_bc_matrix.h5",
  assay = "Spatial"
)
abnormalTissue <- SCTransform(abnormalTissue, assay = "Spatial", verbose = FALSE)

Visualize Tissue Coverage

Visualize the total number of detected molecules (UMIs) across the tissue to confirm that the spatial data were loaded correctly.

# Visualize total counts
SpatialFeaturePlot(abnormalTissue, pt.size.factor = 3, features = "nCount_Spatial") +
  ggtitle("Foreground (Tumor) Spot Counts")

Extract Scaled Expression Matrices

We extract SCT-normalized expression data for both tumor and normal tissues. These matrices will be used as inputs for CoCo-ST.

# Extract scaled expression matrices
fdata <- abnormalTissue@assays[["SCT"]]@scale.data
bdata <- normalTissue@assays[["SCT"]]@scale.data

Construct Affinity Matrices

We build similarity (affinity) matrices between spots using the Laplacian kernel, which captures local relationships in spatial gene expression. Note, there are different ways of constructing such matrices

# construct affinity matrices
rbf <- laplacedot(sigma = 0.50)

fKernel <- kernelMatrix(rbf, t(fdata))
Wf <- fKernel@.Data

bKernel <- kernelMatrix(rbf, t(bdata))
Wb <- bKernel@.Data

Run CoCo-ST

Apply CoCo-ST to extract contrastive components that highlight structures unique to the tumor foreground relative to the normal tissue background.

# Set parameters
para <- 0.1   # contrastive parameter
Dim  <- 20    # number of contrastive components

# Run CoCoST 
CoCoModel <- CoCoST(t(fdata), Wf, t(bdata), Wb, para, Dim)

# Create Seurat reduction object
CoCo <- CreateDimReducObject(
  embeddings = CoCoModel[["fgComponents"]],
  loadings   = CoCoModel[["projMatrix"]],
  key        = "CoCoST_",
  assay      = "SCT"
)

# Add CoCoST components to Seurat object
rownames(CoCo@feature.loadings) <- rownames(fdata)
abnormalTissue@reductions[["CoCoST"]] <- CoCo

Perform Dimensionality Reduction and Clustering

We embed CoCo-ST features into UMAP space, identify spatial domains using graph-based clustering, and visualize the detected spatial domains.

# Run UMAP and clustering based on CoCoST embeddings
abnormalTissue <- RunUMAP(abnormalTissue, reduction = "CoCoST", dims = 1:10, 
                          n.components = 5, reduction.name = "CoCo_UMAP")
abnormalTissue <- FindNeighbors(abnormalTissue, reduction = "CoCo_UMAP", dims = 1:5)
abnormalTissue <- FindClusters(abnormalTissue, verbose = TRUE, resolution = 0.05)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3511
## Number of edges: 80095
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9811
## Number of communities: 6
## Elapsed time: 0 seconds
abnormalTissue@meta.data[["coco_clusters"]]  <- abnormalTissue@meta.data[["SCT_snn_res.0.05"]]

Visualize Clustering and Spatial Domains

Display UMAP and spatial domain plots, showing how CoCo-ST separates spatially coherent regions.

cols <- c("#E1BD6D","deepskyblue1","#7A3A9A","#ED0000FF","#0B775E","#ff00ff","#7B556C","#44B05B")
DimPlot(abnormalTissue, reduction = "CoCo_UMAP", label = FALSE, cols = cols)

# Visualize spatial domains
SpatialDimPlot(abnormalTissue, label = FALSE, label.size = 3, group.by = "coco_clusters", pt.size.factor = 3) +  
  scale_fill_manual(values=cols) +
  ggtitle("CoCoST Spatial Domains")

Rename and Visualize Spatial Domains

Assign interpretable names to spatial domains (SD1–SD6) and replot.

abnormalTissue <- RenameIdents(object = abnormalTissue, `0` = "SD 1", `1` = "SD 3", `2` = "SD 2", `3` = "SD 4",
                               `4` = "SD 5", `5` = "SD 6")

SpatialDimPlot(abnormalTissue, label = FALSE, label.size = 3, pt.size.factor = 3) +  
  scale_fill_manual(values=cols)

Visualize CoCo-ST Components

Display the top contrastive components (CoCoST_1–CoCoST_5) to explore spatially varying patterns.

SpatialFeaturePlot(abnormalTissue, features = c("CoCoST_1", "CoCoST_2", "CoCoST_3", "CoCoST_4", "CoCoST_5"), ncol = 5, 
                   pt.size.factor = 3)

Perform Cell Type Deconvolution

Next, we use RCTD to infer the dominant cell types within each spatial domain using a scRNA-seq reference.

# perform deconvolution
ref <- readRDS("Y:/Projects/FM_ST/Data/129S4_Major_CellTypes.rds")
ref <- UpdateSeuratObject(ref)
subref <- subset(ref, model == "Urethane")
Idents(subref) <- "Major_CellType"
subref@meta.data[["Major_CellType"]] <- factor(subref@meta.data[["Major_CellType"]], 
                                               levels = c("Endothelial cells","Epithelial cells","Fibroblasts",
                                                          "Macrophages","cDC","Prolif_Macro","B cells","T cells",
                                                          "Proliferating T cells","pDC","Neutrophils","Plasma cells",
                                                          "Monocytes","NK cells"))

Visualize Reference Cell Types

Display the UMAP of reference scRNA-seq cell types for interpretability.

# Plot scRNA-seq celltypes (umap) 
col2 = c("plum1","tomato","#762A83","deepskyblue1","#C4961A","#ff00ff","#DC0000FF","#4E84C4","chartreuse3",
                "#D16103","#58593FFF","lightblue1","#068105","yellow","#4E2A1E","#C3D7A4","black")
                
DimPlot(subref, group.by = "Major_CellType", label = F, cols = col2)

Run RCTD

Use RCTD to deconvolve each Visium spot into its most probable cell type compositions.

# extract information to pass to the RCTD Reference function
counts <- subref@assays[["RNA"]]@counts
cluster <- as.factor(subref@meta.data[["Major_CellType"]])
names(cluster) <- colnames(subref)
nUMI <- subref@meta.data[["nCount_RNA"]]
names(nUMI) <- colnames(subref)
reference <- Reference(counts, cluster, nUMI)

# set up query with the RCTD function SpatialRNA
STcounts <- abnormalTissue@assays[["SCT"]]@counts
colnames(STcounts) <- colnames(abnormalTissue)
rownames(STcounts) <- rownames(abnormalTissue)
coords <- GetTissueCoordinates(abnormalTissue)
colnames(coords) <- c("x", "y")
coords[is.na(colnames(coords))] <- NULL
query <- SpatialRNA(coords, STcounts, colSums(STcounts))

We now run RCTD

RCTD <- create.RCTD(query, reference, max_cores = 1)
## 
##     Endothelial cells      Epithelial cells           Fibroblasts 
##                  7989                 10000                  9730 
##           Macrophages                   cDC          Prolif_Macro 
##                 10000                  2315                   452 
##               B cells               T cells Proliferating T cells 
##                  3469                  6585                   159 
##                   pDC           Neutrophils          Plasma cells 
##                   178                  2434                   117 
##             Monocytes              NK cells 
##                  3409                   757
RCTD <- run.RCTD(RCTD, doublet_mode = "doublet") # previously used doublet mode
## [1] "gather_results: finished 1000"
## [1] "gather_results: finished 2000"
## [1] "gather_results: finished 3000"
abnormalTissue <- AddMetaData(abnormalTissue, metadata = RCTD@results$results_df)
abnormalTissue@meta.data[["second_type"]] <- factor(abnormalTissue@meta.data[["second_type"]], 
                                                    levels = c("Endothelial cells","Epithelial cells","Fibroblasts",
                                                               "Macrophages","cDC","Prolif_Macro","B cells","T cells",
                                                               "Proliferating T cells","pDC","Neutrophils","Plasma cells",
                                                               "Monocytes","NK cells"))
abnormalTissue@meta.data[["first_type"]] <- factor(abnormalTissue@meta.data[["first_type"]], 
                                                    levels = c("Endothelial cells","Epithelial cells","Fibroblasts",
                                                               "Macrophages","cDC","Prolif_Macro","B cells","T cells",
                                                               "Proliferating T cells","pDC","Neutrophils","Plasma cells",
                                                               "Monocytes","NK cells"))

Visualize Deconvolution Results

Finally, visualize dominant and secondary cell type distributions across the spatial tissue using RCTD outputs

SpatialDimPlot(abnormalTissue, pt.size.factor = 3, group.by = "first_type") + scale_fill_manual(values=col2)+
  theme(
    legend.text = element_text(size = 12),
    legend.title = element_text(size = 12)
  ) +
  guides(
    fill = guide_legend(override.aes = list(size = 3)),  # for filled shapes
    color = guide_legend(override.aes = list(size = 3))  # for colored points
  )

SpatialDimPlot(abnormalTissue, pt.size.factor = 3, group.by = "second_type") + scale_fill_manual(values=col2)+
  theme(
    legend.text = element_text(size = 12),
    legend.title = element_text(size = 12)
  ) +
  guides(
    fill = guide_legend(override.aes = list(size = 3)),  # for filled shapes
    color = guide_legend(override.aes = list(size = 3))  # for colored points
  )

## Trajectory Inference (tissue-wise) Build SingleCellExperiment using CoCoST contrastive components and CoCo-ST clusters. Then run Slingshot using the normal region (cluster “0”) as the start.

# perform trajectory analysis (tissue wise)
sim <- SingleCellExperiment(assays = STcounts)
reducedDims(sim) <- SimpleList(DRM = CoCoModel[["fgComponents"]])
colData(sim)$clusterlabel <- factor(abnormalTissue@meta.data[["coco_clusters"]])    
sim  <- slingshot(sim, clusterLabels = 'clusterlabel', reducedDim = 'DRM', start.clus="0") 

Below is a helper function to plot trajectories on tissue sample The function divides tissue into grids, computes pseudotime direction per grid, and visualizes trajectories as arrows over pseudotime and cluster maps.

plotTrajectory = function(pseudotime, location, clusterlabels, gridnum,
                          color_in, pointsize = 5, arrowlength = 0.4,
                          arrowsize = 0.8, textsize = 22) {
  
  pseudotime_use = pseudotime
  info = as.data.frame(location)
  colnames(info) = c("sdimx","sdimy")
  grids = gridnum
  
  min_x = min(info$sdimx)
  min_y = min(info$sdimy)
  max_x = max(info$sdimx)
  max_y = max(info$sdimy)
  
  # grid anchors
  x_anchor = seq(min_x, max_x, length.out = grids + 1)
  y_anchor = seq(min_y, max_y, length.out = grids + 1)
  
  # store start/end coordinates
  start_x_dat = c()
  start_y_dat = c()
  end_x_dat = c()
  end_y_dat = c()
  
  count = 0
  for(num_x in 1:grids){
    for(num_y in 1:grids){
      filter_x = which(info$sdimx >= x_anchor[num_x] & info$sdimx <= x_anchor[num_x+1])
      filter_y = which(info$sdimy >= y_anchor[num_y] & info$sdimy <= y_anchor[num_y+1])
      points_in_grid = intersect(filter_x, filter_y)
      
      if(length(points_in_grid) > 1){
        count = count + 1
        min_point = info[points_in_grid[which.min(pseudotime_use[points_in_grid])], ]
        max_point = info[points_in_grid[which.max(pseudotime_use[points_in_grid])], ]
        start_x_dat[count] = min_point$sdimx
        start_y_dat[count] = min_point$sdimy
        end_x_dat[count]   = max_point$sdimx
        end_y_dat[count]   = max_point$sdimy
      }
    }
  }
  
  loc1 = info$sdimx
  loc2 = info$sdimy
  time = pseudotime_use + 0.01
  
  # === Pseudotime scatter ===
  datt = data.frame(time, loc1, loc2)
  p01 = ggplot(datt, aes(x = loc1, y = loc2, color = time)) +
    geom_point(alpha = 1, size = pointsize) +
    scale_color_gradientn(colours = c("red", "green")) +
    theme_void() +
    theme(plot.title = element_text(size = textsize, face = "bold"),
          text = element_text(size = textsize),
          legend.position = "bottom")
  
  # Arrow style
  arrow_style <- arrow(length = unit(arrowlength,"cm"),
                       type = "closed", ends = "last")
  
  datt2 = data.frame(start_x_dat, start_y_dat, end_x_dat, end_y_dat)
  
  # Arrows only
  p02 = ggplot(datt2) +
    geom_segment(aes(x = start_x_dat, y = start_y_dat,
                     xend = end_x_dat, yend = end_y_dat),
                 arrow = arrow_style, size = arrowsize,
                 color = "grey20") +
    theme_void()
  
  p03 = ggplot(datt2) +
    geom_segment(aes(x = end_x_dat, y = end_y_dat,
                     xend = start_x_dat, yend = start_y_dat),
                 arrow = arrow_style, size = arrowsize,
                 color = "grey20") +
    theme_void()
  
  # Clusters + arrows
  datt1 = data.frame(time, loc1, loc2, clusterlabels = clusterlabels)
  
  p1 = ggplot(datt1, aes(x = loc1, y = loc2)) +
    geom_point(alpha = 1, size = pointsize, aes(color = clusterlabels)) +
    scale_colour_manual(values = color_in) +
    theme_void() +
    theme(plot.title = element_text(size = textsize, face = "bold"),
          text = element_text(size = textsize),
          legend.position = "bottom")
  
  p22 = p1 +
    geom_segment(data = datt2,
                 aes(x = start_x_dat, y = start_y_dat,
                     xend = end_x_dat, yend = end_y_dat),
                 arrow = arrow_style, size = arrowsize,
                 color = "grey20")
  
  p33 = p1 +
    geom_segment(data = datt2,
                 aes(x = end_x_dat, y = end_y_dat,
                     xend = start_x_dat, yend = start_y_dat),
                 arrow = arrow_style, size = arrowsize,
                 color = "grey20")
  
  return(list("Pseudotime"     = p01,
              "Arrowplot1"     = p02,
              "Arrowplot2"     = p03,
              "Arrowoverlay1"  = p22,
              "Arrowoverlay2"  = p33))
}

Plot Tissue Trajectories—- Extract spatial coordinates and pseudotime, then generate pseudotime and arrow-overlaid trajectory plots.

flocation <- GetTissueCoordinates(abnormalTissue)
pseudotime_traj1 = sim@colData@listData$slingPseudotime_1 
gridnum = 10
color_in = c("#E1BD6D","deepskyblue1","#7A3A9A","#ED0000FF","#0B775E","#ff00ff","black")
p_traj1 = plotTrajectory(pseudotime_traj1, flocation, abnormalTissue@meta.data[["coco_clusters"]], gridnum,
                         cols, pointsize=2.5 ,arrowlength=0.2, arrowsize=1, textsize=15)
p_traj1$Arrowoverlay1

p_traj1$Pseudotime

Visualization of pseudotime across the tissue revealed a clear spatial gradient of cellular progression. The Adenoma region (SD 5) exhibited higher pseudotime values, indicating that cells within this area are more differentiated and represent a later stage of tissue evolution compared to surrounding normal or transitional structures.